/*
Code generates weights that we will use to generate bootstrapped standard errors. We use the fractional (Bayesian) bootstrap and draw weights using the dirichlet distribution. 

Code is structured in 3 parts: 
- 	Part 1:  generate indicators for all the heteroeneity splits
- 	Part 2:  generate weights for ex-post analysis
- 	Part 3:  generate weights for ex-ante analysis
*/

* Part 1:  generate indicators for all the heteroeneity splits
********************************************************************************
********************************************************************************

* parent & hh chars
*-------------------------------------------------------------------------------
use "$data/initial_data/sample_chars.dta", clear
gen fathers_ed=(m116_0>=5 & m116_0<. ) if m116_0~=98 & m115_0<.
gen mothers_ed=(m104_0>=5 & m104_0<. ) if m104_0~=98 & m103_0<.
gen older=age_daughter>=15

keep carer_id scst dirtfloor workforcash market canread fathers_ed  mothers_ed married_child older
tempfile sample_chars
save `sample_chars'

* treatment
*-------------------------------------------------------------------------------
use  "$data/initial_data/treatment.dta", clear
gen treat1=treatment==1 if treatment==0 | treatment==1
gen treat2=treatment==2 if treatment==0 | treatment==2
keep cluster_id treat?
tempfile treat
save `treat'

* realized trajectories of daughter(s)
*-------------------------------------------------------------------------------
use  "$data/created_data/Observed Trajectories (5-year FU) -- cleaned.dta", clear

* indicator for whether (any) daughter was inschool at 16 and not and whether was married at 17
*-------------------------------------------------------------------------------
tab inschool16, m
keep carer_id inschool16 married17

collapse (mean) inschool16 married17, by(carer_id)
replace inschool16=0 if inschool16<1 & inschool16<.
replace married17=1 if married17>0  & married17<.

tempfile obs
save `obs'


* Part 2:  generate weights for ex-post analysis
********************************************************************************
********************************************************************************

* load data
*-------------------------------------------------------------------------------
use "$data/initial_data/expost_data", clear
destring _all, replace
gen exp_id=_n
rename Bride_Static_ round


* draw weights from a dirichlet distribution  (clustered at respondent level)
*-------------------------------------------------------------------------------

set seed 10101

keep if round==1
gen w0=1 // uniformally weighted for the main estimate

count 
local N=r(N)
local nbs=500

mata
A=J(`N', 1, 1)
A=A
z=`N'*rdirichlet(`nbs',A)'
end

getmata w*=z, replace 
br w*

* merge in all the variables for the heterogeneity splits
*-------------------------------------------------------------------------------

merge m:1 carer_id using `sample_chars'
drop if _merge==2
drop _merge

merge m:1 carer_id using `obs'
drop if _merge==2
drop _merge

cap drop cluster_id
gen cluster_id=floor(carer_id/1000000)
codebook cluster_id

merge m:1 cluster_id using `treat'
drop if _merge==2
drop _merge

expand 3
sort carer_id

drop round
by carer_id: gen round=_n


* export main weights
*-------------------------------------------------------------------------------

sort carer_id round

preserve
keep w0-w500
outsheet using  "$data/created_data/xp_w.csv", replace comma nonames 
restore

* record number of observations	for each of the heterogeneity splits
*-------------------------------------------------------------------------------

foreach hetvar of varlist inschool16  married17 mothers_ed fathers_ed scst dirtfloor married_child older treat1 treat2  { 

	di "`hetvar'==1"
	unique carer_id if `hetvar'==1
	di "`hetvar'==0"
	unique carer_id if `hetvar'==0

}


* export weights for each of the heterogeneity splits
*-------------------------------------------------------------------------------

foreach hetvar of varlist inschool16  married17 mothers_ed fathers_ed scst dirtfloor married_child older treat1 treat2  { 
	cap drop hw* 
	
	foreach i of numlist 0/500 {
		gen hw1_`i'=w`i'*`hetvar'
		gen hw0_`i'=w`i'*(1-`hetvar')
	}

	recode hw* (.=0)

	preserve
	keep hw1_*
	outsheet using  "$data/created_data/xp_w_`hetvar'1.csv", replace comma nonames 
	restore

	preserve
	keep hw0_*
	outsheet using  "$data/created_data/xp_w_`hetvar'0.csv", replace comma nonames 
	restore
	
	}


* Part 3:  generate weights for ex-ante analysis
********************************************************************************
********************************************************************************

* load data
*-------------------------------------------------------------------------------
use "$data/initial_data/exante_data", clear

set seed 10101

* draw weights from a dirichlet distribution (clustered at respondent level)
*-------------------------------------------------------------------------------
collapse (count) n=Bride_Dynamic_roster__id, by(carer_id)
gen w0=1

count 
local N=r(N)
local nbs=500
mata
A=J(`N', 1, 1)
A=A
z=`N'*rdirichlet(`nbs',A)'

end

getmata w*=z, replace

* expand to observation level
*-------------------------------------------------------------------------------
expand n
sort carer_id

by carer_id: gen i=_n

* merge in all the variables for the heterogeneity splits
*-------------------------------------------------------------------------------

merge m:1 carer_id using `sample_chars'
drop if _merge==2
drop _merge

merge m:1 carer_id using `obs'
drop if _merge==2
drop _merge

cap drop cluster_id
gen cluster_id=floor(carer_id/1000000)
codebook cluster_id

merge m:1 cluster_id using `treat'
drop if _merge==2
drop _merge


* export main weights
*-------------------------------------------------------------------------------

preserve
keep w0-w500
outsheet using  "$data/created_data/xa_w.csv", replace comma nonames 
restore


* record number of observations	for each of the heterogeneity splits
*-------------------------------------------------------------------------------

foreach hetvar of varlist inschool16 married17 mothers_ed fathers_ed scst dirtfloor married_child older treat1 treat2  { 

	di "`hetvar'==1"
	unique carer_id if `hetvar'==1
	di "`hetvar'==0"
	unique carer_id if `hetvar'==0

}

* export weights for each of the heterogeneity splits
*-------------------------------------------------------------------------------

foreach hetvar of varlist inschool16  married17 mothers_ed fathers_ed scst dirtfloor married_child older treat1 treat2  { //
	cap drop hw* 
	
	foreach i of numlist 0/500 {
		gen hw1_`i'=w`i'*`hetvar'
		gen hw0_`i'=w`i'*(1-`hetvar')
	}

	recode hw* (.=0)

	preserve
	keep hw1_*
	outsheet using  "$data/created_data/xa_w_`hetvar'1.csv", replace comma nonames 
	restore

	preserve
	keep hw0_*
	outsheet using  "$data/created_data/xa_w_`hetvar'0.csv", replace comma nonames 
	restore
	
	}
